﻿// lucene version compatibility level: 4.8.1
using ICU4N.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Core;
using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
using System.IO;

namespace Lucene.Net.Collation
{
    /*
     * Licensed to the Apache Software Foundation (ASF) under one or more
     * contributor license agreements.  See the NOTICE file distributed with
     * this work for additional information regarding copyright ownership.
     * The ASF licenses this file to You under the Apache License, Version 2.0
     * (the "License"); you may not use this file except in compliance with
     * the License.  You may obtain a copy of the License at
     *
     *     http://www.apache.org/licenses/LICENSE-2.0
     *
     * Unless required by applicable law or agreed to in writing, software
     * distributed under the License is distributed on an "AS IS" BASIS,
     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     * See the License for the specific language governing permissions and
     * limitations under the License.
     */

    /// <summary>
    /// Configures <see cref="KeywordTokenizer"/> with <see cref="ICUCollationAttributeFactory"/>.
    /// </summary>
    /// <remarks>
    /// Converts the token into its <see cref="CollationKey"/>, and
    /// then encodes the <see cref="CollationKey"/> either directly or with 
    /// <see cref="IndexableBinaryStringTools"/> (see <a href="#version">below</a>), to allow it to
    /// be stored as an index term.
    /// <para/>
    /// <strong>WARNING:</strong> Make sure you use exactly the same <see cref="Collator"/> at
    /// index and query time -- CollationKeys are only comparable when produced by
    /// the same <see cref="Collator"/>.  <see cref="RuleBasedCollator"/>s are 
    /// independently versioned, so it is safe to search against stored
    /// <see cref="CollationKey"/>s if the following are exactly the same (best practice is
    /// to store this information with the index and check that they remain the
    /// same at query time):
    /// <list type="number">
    ///     <item><description>Collator version - see <see cref="Collator"/> Version</description></item>
    ///     <item><description>The collation strength used - see <see cref="Collator.Strength"/></description></item>
    /// </list>
    /// <para/>
    /// <see cref="CollationKey"/>s generated by ICU Collators are not compatible with those
    /// generated by java.text.Collators.  Specifically, if you use 
    /// <see cref="ICUCollationKeyAnalyzer"/> to generate index terms, do not use 
    /// CollationKeyAnalyzer on the query side, or vice versa.
    /// <para/>
    /// ICUCollationKeyAnalyzer is significantly faster and generates significantly
    /// shorter keys than CollationKeyAnalyzer.  See
    /// <a href="http://site.icu-project.org/charts/collation-icu4j-sun"
    /// >http://site.icu-project.org/charts/collation-icu4j-sun</a> for key
    /// generation timing and key length comparisons between ICU4J and
    /// java.text.Collator over several languages.
    /// <para/>
    /// <a name="version"/>
    /// You must specify the required <see cref="LuceneVersion"/>
    /// compatibility when creating <see cref="ICUCollationKeyAnalyzer"/>:
    /// <list type="bullet">
    ///     <item><description>As of 4.0, <see cref="CollationKey"/>s are directly encoded as bytes. Previous
    ///     versions will encode the bytes with <see cref="IndexableBinaryStringTools"/>.</description></item>
    /// </list>
    /// </remarks>
    [ExceptionToClassNameConvention]
    public sealed class ICUCollationKeyAnalyzer : Analyzer
    {
        private readonly Collator collator;
        private readonly ICUCollationAttributeFactory factory;
        private readonly LuceneVersion matchVersion;

        /// <summary>
        /// Create a new <see cref="ICUCollationKeyAnalyzer"/>, using the specified <paramref name="collator"/>.
        /// </summary>
        /// <param name="matchVersion">See <see cref="ICUCollationKeyAnalyzer"/>.</param>
        /// <param name="collator"><see cref="CollationKey"/> generator.</param>
        public ICUCollationKeyAnalyzer(LuceneVersion matchVersion, Collator collator)
        {
            this.matchVersion = matchVersion;
            this.collator = collator;
            this.factory = new ICUCollationAttributeFactory(collator);
        }

        [Obsolete("Use ICUCollationKeyAnalyzer.ICUCollationKeyAnalyzer(LuceneVersion, Collator) and specify a version instead. This ctor will be removed in Lucene 5.0")]
        public ICUCollationKeyAnalyzer(Collator collator)
            : this(LuceneVersion.LUCENE_31, collator)
        {
        }

        protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
        {
#pragma warning disable 612, 618
            if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_40))
#pragma warning restore 612, 618
            {
                KeywordTokenizer tokenizer = new KeywordTokenizer(factory, reader, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
                return new TokenStreamComponents(tokenizer, tokenizer);
            }
            else
            {
                KeywordTokenizer tokenizer = new KeywordTokenizer(reader);
                return new TokenStreamComponents(tokenizer,
#pragma warning disable 612, 618
                    new ICUCollationKeyFilter(tokenizer, collator));
#pragma warning restore 612, 618
            }
        }
    }
}
